** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Figure O.3: Binscatter plots, raw P1 and  P2 scores on a 10-point scale /////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Language
rename d_sub6 Mathematics
rename d_sub7 Physics
rename d_sub8 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
label var Language "Foreign Language"

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
tab year
drop if year==2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 tab subject, sum(norm_p1score)
 drop if subject=="lang" | subject=="port" 
 tab subject, sum(norm_p1score)
 drop Language Portuguese 

*********************************************************************************
**************** Descriptive analysis *******************************************
*********************************************************************************

sum score p1score
gen score6=score/6
sum score6 p1score


***************************
* Binscatter P1 and P2 
***************************

foreach v of varlist Biology - Physics {
binscatter score6 p1score if `v'==1, ylabel(0(1)10) xlabel(0(1)10) ytitle("P{sub:2}") title("`v'") graphregion(color(white)) bgcolor(white) saving("Output/score6_p1score_all_`v'.gph", replace) xtitle("P{sub:1}") legend(off)
}

graph combine "Output/score6_p1score_all_Biology.gph" "Output/score6_p1score_all_Chemistry.gph" "Output/score6_p1score_all_Geography.gph" "Output/score6_p1score_all_History.gph" "Output/score6_p1score_all_Mathematics.gph" "Output/score6_p1score_all_Physics.gph", ycommon
graph export "Output/score6_p1score_all.pdf", replace
